rm(list=ls(all=TRUE))
library(foreign)
library(sandwich)
library(lmtest)

setwd("~/Dropbox/Apps/ShareLaTeX/Minimal Effects/replication/data")

master.sheet <- read.csv("master_sheet_input.csv", stringsAsFactors = FALSE)

##################
#HELPER FUNCTIONS#
##################

# Function to compute clustered standard errors, from Mahmood Arai.
cl <- function(fm, cluster){
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- fm$rank
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm), 2, function(x) tapply(x, cluster, sum))
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
  coeftest(fm, vcovCL)
}

analyze.data <- function(data, dv.t1, dv.t0, votechoice.t0, xvars) {
  data <- read.dta(data)
  data <- subset(data, data[,"respondent_t1"] == 1)
  t0.undecided <- prop.table(table(data[,votechoice.t0]==0))[2]
  percent.afam <- mean(data[,"t0_identify_afam"])
  percent.poc <- mean(data[,"t0_identify_poc"])
  t1.n <- nrow(data)
  percent.treat <- prop.table(table(data[,"treat_ind"]==1))[2]
  cor.dv <- cor(data[data$treat_ind==0,dv.t1], data[data$treat_ind==0,dv.t0])
  data.dv <- data[,dv.t1]
  # Rescale to mean 0 sd 1 in placebo group; treatment effects can then be interpreted
  # as the effect in standard deviations the treatment would have among an untreated population.
  data.dv <- (data.dv - mean(data.dv[!data$treat_ind], na.rm=TRUE)) /
              sd(data.dv[!data$treat_ind], na.rm=TRUE)
  t0.covariates.name <- unlist(strsplit(xvars, " "))
  x <- data[,c(t0.covariates.name)]
  x <- as.matrix(x, dimnames = list(NULL, names(x)))
  lm.obj.covars <- lm(data.dv ~ data$treat_ind + x)
  result.covars <- cl(lm.obj.covars, data$hh_id)["data$treat_ind",]
  lm.obj.nocovars <- lm(data.dv ~ data$treat_ind)
  result.nocovars <- cl(lm.obj.nocovars, data$hh_id)["data$treat_ind",]
  lm.obj.interact.covars.pid <- lm(data.dv ~ data$treat_ind:data$t0_pid + data$treat_ind + x)
  result.interact.covars.pid <- cl(lm.obj.interact.covars.pid, data$hh_id)["data$treat_ind:data$t0_pid",]
  lm.obj.interact.nocovars.pid <- lm(data.dv ~ data$treat_ind:data$t0_pid + data$treat_ind + data$t0_pid)
  result.interact.nocovars.pid <- cl(lm.obj.interact.nocovars.pid, data$hh_id)["data$treat_ind:data$t0_pid",]
  lm.obj.interact.covars.afam <- lm(data.dv ~ data$treat_ind:data$t0_identify_afam + data$treat_ind + x)
  result.interact.covars.afam <- cl(lm.obj.interact.covars.afam, data$hh_id)["data$treat_ind:data$t0_identify_afam",]
  lm.obj.interact.nocovars.afam <- lm(data.dv ~ data$treat_ind:data$t0_identify_afam + data$treat_ind + data$t0_identify_afam)
  result.interact.nocovars.afam <- cl(lm.obj.interact.nocovars.afam, data$hh_id)["data$treat_ind:data$t0_identify_afam",]
  lm.obj.interact.covars.poc <- lm(data.dv ~ data$treat_ind:data$t0_identify_poc + data$treat_ind + x)
  result.interact.covars.poc <- cl(lm.obj.interact.covars.poc, data$hh_id)["data$treat_ind:data$t0_identify_poc",]
  lm.obj.interact.nocovars.poc <- lm(data.dv ~ data$treat_ind:data$t0_identify_poc + data$treat_ind + data$t0_identify_poc)
  result.interact.nocovars.poc <- cl(lm.obj.interact.nocovars.poc, data$hh_id)["data$treat_ind:data$t0_identify_poc",]
  return(list(t1.n = t1.n, percent.treat = percent.treat,
              cor.dv = cor.dv, t0.undecided = t0.undecided, 
              percent.afam = percent.afam, percent.poc = percent.poc,
              results.covars = result.covars[c(1,2,4)], results.nocovars = result.nocovars[c(1,2,4)],
              results.pid.covars = result.interact.covars.pid[1:2], results.pid.nocovars = result.interact.nocovars.pid[1:2],
              results.afam.covars = result.interact.covars.afam[1:2], resuls.afam.nocovars = result.interact.nocovars.afam[1:2],
              results.poc.covars = result.interact.covars.poc[1:2], results.poc.nocovars = result.interact.nocovars.poc[1:2]))
}

turnout.effect <- function(data, turnout.dv, dv.t0, xvars) {
  data <- read.dta(data)
  t0.covariates.name <- unlist(strsplit(xvars, " "))
  x <- data[,c(t0.covariates.name)]
  x <- as.matrix(x, dimnames = list(NULL, names(x)))
  lm.obj.turnout <-  lm(data[,turnout.dv] ~ data$treat_ind + x)
  result.turnout <- cl(lm.obj.turnout, data$hh_id)["data$treat_ind",]
  data$baseline <- data[,dv.t0]
  lm.obj.interact.turnout.baseline <- lm(data[,turnout.dv] ~ data$treat_ind:data[,"baseline"] + data$treat_ind + x)
  result.interact.turnout.baseline <- cl(lm.obj.interact.turnout.baseline, data$hh_id)["data$treat_ind:data[, \"baseline\"]",]
  return(list(result.turnout = result.turnout[1:2], result.turnout.interact = result.interact.turnout.baseline[1:2])) 
}  

#data <- read.dta("MO_Canvass_2016.dta")
foo <- unlist(analyze.data(data = master.sheet[3,]$Data, 
                           dv.t1 = master.sheet[3,]$dv_t1, 
                           dv.t0 = master.sheet[3,]$dv_t0,
                           votechoice.t0 = master.sheet[3,]$votechoice_t0,
                           xvars = master.sheet[3,]$xvars))

foo.turnout <- unlist(turnout.effect(data = master.sheet[3,]$Data,
                           turnout.dv = master.sheet[3,]$voted16_var,
                           dv.t0 = master.sheet[3,]$dv_t0,
                           xvars = master.sheet[3,]$xvars))

foo <- unlist(analyze.data(data = master.sheet[15,]$Data, 
                           dv.t1 = master.sheet[15,]$dv_t1, 
                           dv.t0 = master.sheet[15,]$dv_t0,
                           votechoice.t0 = master.sheet[15,]$votechoice_t0,
                           xvars = master.sheet[15,]$xvars))

for(i in 1:15) {
  master.sheet[i,8:31] <- unlist(analyze.data(data = master.sheet[i,]$Data, 
                                      dv.t1 = master.sheet[i,]$dv_t1, 
                                      dv.t0 = master.sheet[i,]$dv_t0,
                                      votechoice.t0 = master.sheet[i,]$votechoice_t0,
                                      xvars = master.sheet[i,]$xvars))
}

for(i in 2:11) {
  master.sheet[i,32:35] <- unlist(turnout.effect(data = master.sheet[i,]$Data,
                                                 turnout.dv = master.sheet[i,]$voted16_var,
                                                 dv.t0 = master.sheet[i,]$dv_t0,
                                                 xvars = master.sheet[i,]$xvars))
}

#Manually remove PID interaction for Philly experiments
master.sheet$PID.Interaction.Covars[master.sheet$Experiment == "Philly_Primary_2015"] <- NA
master.sheet$PID.Interaction.SE.Covars[master.sheet$Experiment == "Philly_Primary_2015"] <- NA
master.sheet$PID.Interaction.No.Covars[master.sheet$Experiment == "Philly_Primary_2015"] <- NA
master.sheet$PID.Interaction.SE.No.Covars[master.sheet$Experiment == "Philly_Primary_2015"] <- NA

sum(1/master.sheet$Candidate.Effect.SE.with.Covars^2, na.rm = TRUE) * 13/9

write.csv(master.sheet, "master_sheet_output.csv")